library(stringr)
library(feather)
library(koRpus)
library(tidyverse)
library(lme4)
library(dplyr)
library(sjPlot)
library(corrplot)
library(tidytext)
library(tm)
library(childesr)
# Load data
childes_all <- read_feather("/Users/Yawen/Desktop/lexical diversity/triaL6_childes/childes_all.feather")%>%
filter(age >=14 & age <=58) # align age range with that of LDP
ldp_all <- read_feather("/Users/Yawen/Desktop/lexical diversity/trial5_ldp/ldp_all.feather")
# Plot Growth Curve
childes_all %>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_vocd), color="vocd"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mtld), color="mtld"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mlu), color="mlu"), se=F)+
theme_classic()+
labs(title = "CHILDES: Growth Curve of Lexical Diversity",
subtitle = "14 ~ 58 Months",
y = "lexical diversity (scaled)")
# compare with CDI
ldp_all%>%
filter(age >=14 & age <=30)%>%
group_by(subject)%>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mlu), color="mlu"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_vocd), color="vocd"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mtld), color="mtld"), se=F)+
geom_smooth(aes(x=age, y=scale(cdi), color="CDI"),se=F)+
theme_classic()+
labs(title = "Compare Lexical Diversity Indices with CDI",
subtitle = "LDP: 18 ~ 30 Months",
y = "lexical diversity (scaled)")
# compare with PPVT
ldp_all%>%
filter(age >=30 & age <=53)%>%
group_by(subject)%>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mlu), color="mlu"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_vocd), color="vocd"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mtld), color="mtld"), se=F)+
geom_smooth(aes(x=age, y=scale(ppvt), color="PPVT"),se=F)+
theme_classic()+
labs(title = "Compare Lexical Diversity Indices with PPVT",
subtitle = "LDP: 30 ~ 53 Months",
y = "lexical diversity (scaled)")
# TTR vs MATTR
childes_all %>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
theme_classic()+
labs(title = "Growth Curve by TTR & MATTR",
subtitle = "CHILDES: 14 ~ 58 Months",
y = "lexical diversity (scaled)")
ldp_all %>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
theme_classic()+
labs(title = "Children's Growth Curve by TTR & MATTR",
subtitle = "LDP: 14 ~ 58 Months",
y = "lexical diversity (scaled)")
# MTLD vs vocd-D (Kid)
childes_all %>%
ggplot()+
geom_smooth(aes(x=age,y=kid_vocd, color="vocd"), se=F)+
geom_smooth(aes(x=age,y=kid_mtld, color="mtld"), se=F)+
theme_classic()+
labs(title = "Children's Growth Curve by MTLD & vocd-D",
subtitle = "CHILDES: 14 ~ 58 Months",
y = "lexical diversity (scaled)")
ldp_all %>%
ggplot()+
geom_smooth(aes(x=age,y=kid_vocd, color="vocd"), se=F)+
geom_smooth(aes(x=age,y=kid_mtld, color="mtld"), se=F)+
theme_classic()+
labs(title = "Children's Growth Curve by MTLD & vocd-D",
subtitle = "LDP: 14 ~ 58 Months",
y = "lexical diversity (scaled)")
# MTLD vs vocd-D (Mother)
childes_all %>%
ggplot()+
geom_smooth(aes(x=age,y=mom_vocd, color="vocd"), se=F)+
geom_smooth(aes(x=age,y=mom_mtld, color="mtld"), se=F)+
theme_classic()+
labs(title = "Mother's Growth Curve by MTLD & vocd-D",
subtitle = "CHILDES: 14 ~ 58 Months",
y = "lexical diversity (scaled)")
ldp_all %>%
ggplot()+
geom_smooth(aes(x=age,y=mom_vocd, color="vocd"), se=F)+
geom_smooth(aes(x=age,y=mom_mtld, color="mtld"), se=F)+
theme_classic()+
labs(title = "Mother's Growth Curve by MTLD & vocd-D",
subtitle = "LDP: 14 ~ 58 Months",
y = "lexical diversity (scaled)")
Get parameters of children and mother in LDP data
Get parameters of children and mother in CHILDES data
childes_intercept <- read_feather("/Users/Yawen/Desktop/lexical diversity/trial5_ldp/childes_intercept.feather")
ldp_intercept <- read_feather("/Users/Yawen/Desktop/lexical diversity/trial5_ldp/ldp_intercept.feather")
# variance of children's intercept
ldp_intercept %>%
ungroup(.)%>%
gather(measure, value,
cdi_intercept, ppvt_intercept, mtld_intercept,
mattr_intercept, vocd_intercept, ttr_intercept,
mlu_intercept, sen_intercept) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var = sd/mean)
## # A tibble: 8 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 cdi_intercept 497.7072271 151.15746724 0.30370760
## 2 mattr_intercept 0.4161320 0.04073419 0.09788765
## 3 mlu_intercept 2.3510246 0.35195918 0.14970459
## 4 mtld_intercept 12.5653449 2.31253102 0.18404039
## 5 ppvt_intercept 27.4537313 10.91072920 0.39742245
## 6 sen_intercept 19.2570358 11.28566953 0.58605435
## 7 ttr_intercept 0.1935208 0.02725039 0.14081376
## 8 vocd_intercept 29.2178591 1.75218981 0.05996982
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mtld_intercept,mattr_intercept, vocd_intercept,
ttr_intercept, mlu_intercept) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var = sd/mean)
## # A tibble: 5 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 mattr_intercept 0.5557397 0.04477623 0.08057051
## 2 mlu_intercept 3.4973603 0.68263689 0.19518632
## 3 mtld_intercept 16.1612760 4.53308856 0.28049076
## 4 ttr_intercept 0.2766605 0.12602093 0.45550755
## 5 vocd_intercept 30.0039552 2.51306536 0.08375780
ldp_intercept %>%
gather(measure, value,
cdi_intercept, ppvt_intercept, mtld_intercept,
mattr_intercept, vocd_intercept, ttr_intercept,
mlu_intercept, sen_intercept) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure, scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "coef_of_var of Children's Intercept",
subtitle = "LDP: 14 ~ 58 Months")
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mtld_intercept,mattr_intercept, vocd_intercept,
ttr_intercept, mlu_intercept) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure, scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Children's Intercept",
subtitle = "CHILDES: 14 ~ 58 Months")
# variance of children's slope
ldp_intercept %>%
ungroup(.)%>%
gather(measure, value,
cdi_slope, ppvt_slope, mtld_slope, mattr_slope,
vocd_slope, ttr_slope, mlu_slope, sen_slope) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var =sd/mean)
## # A tibble: 8 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 cdi_slope 834.31718193 130.35334414 0.15623955
## 2 mattr_slope 0.22697313 0.05004736 0.22049906
## 3 mlu_slope 2.35370735 0.22224041 0.09442143
## 4 mtld_slope 17.93810450 3.00603839 0.16757837
## 5 ppvt_slope 77.49354350 18.14881161 0.23419773
## 6 sen_slope 39.00077437 19.07906158 0.48919699
## 7 ttr_slope -0.02935315 0.04614579 -1.57208962
## 8 vocd_slope 10.28252580 2.88139891 0.28022287
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mtld_slope, mattr_slope, vocd_slope, ttr_slope, mlu_slope) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var =sd/mean)
## # A tibble: 5 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 mattr_slope 0.2204807 0.09766974 0.4429854
## 2 mlu_slope 1.5431814 0.78996935 0.5119096
## 3 mtld_slope 19.8912649 10.96952000 0.5514742
## 4 ttr_slope -0.0844668 0.10919178 -1.2927183
## 5 vocd_slope 10.3148411 7.57719236 0.7345913
ldp_intercept %>%
gather(measure, value,
cdi_slope, ppvt_slope, mtld_slope, mattr_slope,
vocd_slope, ttr_slope, mlu_slope, sen_slope) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure,scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Children's Slope",
subtitle = "LDP: 14 ~ 58 Months")
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mtld_slope, mattr_slope, vocd_slope, ttr_slope, mlu_slope) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure,scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Children's Slope",
subtitle = "CHILDES: 14 ~ 58 Months")
# variance of mother's intercept
ldp_intercept %>%
ungroup(.)%>%
gather(measure, value,
mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept,
mom_mlu_intercept) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var = sd/mean)
## # A tibble: 5 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 mom_mattr_intercept 0.5618204 0.02592131 0.04613806
## 2 mom_mlu_intercept 4.0964952 0.40832536 0.09967676
## 3 mom_mtld_intercept 31.2723507 5.19678762 0.16617835
## 4 mom_ttr_intercept 0.1581931 0.03998274 0.25274647
## 5 mom_vocd_intercept 34.2465577 0.58022077 0.01694246
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept, mom_mlu_intercept) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var = sd/mean)
## # A tibble: 5 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 mom_mattr_intercept 0.6790541 0.02417634 0.03560297
## 2 mom_mlu_intercept 4.4878401 0.79098088 0.17624979
## 3 mom_mtld_intercept 34.0778889 5.28812348 0.15517755
## 4 mom_ttr_intercept 0.3143275 0.14994716 0.47704122
## 5 mom_vocd_intercept 32.7165193 0.50013578 0.01528695
ldp_intercept %>%
gather(measure, value,
mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept,
mom_mlu_intercept) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure, scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Mother's Intercept",
subtitle = "LDP: 14 ~ 58 Months")
childes_intercept %>%
gather(measure, value,
mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept, mom_mlu_intercept) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure, scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Mother's Intercept",
subtitle = "CHILDES: 14 ~ 58 Months")
# variance of mother's slope
ldp_intercept %>%
ungroup(.)%>%
gather(measure, value,
mom_mtld_slope, mom_mattr_slope,
mom_vocd_slope, mom_ttr_slope,mom_mlu_intercept) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var =sd/mean)
## # A tibble: 5 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 mom_mattr_slope 0.06425911 0.01594960 0.24820767
## 2 mom_mlu_intercept 4.09649522 0.40832536 0.09967676
## 3 mom_mtld_slope 14.74536342 2.79757874 0.18972599
## 4 mom_ttr_slope 0.04673040 0.05086041 1.08837952
## 5 mom_vocd_slope 0.58692547 0.22829190 0.38896234
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mom_mtld_slope, mom_mattr_slope, mom_vocd_slope,
mom_ttr_slope, mom_mlu_slope) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
coef_of_var =sd/mean)
## # A tibble: 5 x 4
## measure mean sd coef_of_var
## <chr> <dbl> <dbl> <dbl>
## 1 mom_mattr_slope 0.04819749 0.03164890 0.6566504
## 2 mom_mlu_slope 0.81206158 0.76783470 0.9455375
## 3 mom_mtld_slope 9.79313574 3.33620713 0.3406679
## 4 mom_ttr_slope 0.05391822 0.03679039 0.6823369
## 5 mom_vocd_slope 0.33546011 0.44490717 1.3262595
ldp_intercept %>%
gather(measure, value,
mom_mtld_slope, mom_mattr_slope, mom_vocd_slope,
mom_ttr_slope,mom_mlu_slope) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure,scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Mother's Slope",
subtitle = "LDP: 14 ~ 58 Months")
childes_intercept %>%
gather(measure, value,
mom_mtld_slope, mom_mattr_slope, mom_vocd_slope,
mom_ttr_slope, mom_mlu_slope) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure,scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Mother's Slope",
subtitle = "CHILDES: 14 ~ 58 Months")
# correlation plot of child's intercept
ldp_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(cdi_intercept, ppvt_intercept, mtld_intercept,
mattr_intercept, vocd_intercept, ttr_intercept,
mlu_intercept, sen_intercept)%>%
cor() %>%
corrplot::corrplot(method = "square", type = "upper")
childes_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mtld_intercept,mattr_intercept, vocd_intercept,
ttr_intercept,mlu_intercept)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")
# correlation plot of mother's intercept
ldp_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept,
mom_mlu_intercept)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")
childes_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept,
mom_mlu_intercept)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")
# correlation plot of child's slope
ldp_intercept %>%
filter(complete.cases(.))%>%
select(cdi_slope, ppvt_slope, mtld_slope, mattr_slope,
vocd_slope, ttr_slope, sen_slope, mlu_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type = "upper")
childes_intercept%>%
filter(complete.cases(.))%>%
select(mtld_slope, mattr_slope,
vocd_slope, ttr_slope, mlu_slope)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")
# correlation plot of mother's slope
ldp_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mom_mtld_slope, mom_mattr_slope, mom_vocd_slope, mom_ttr_slope)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")
childes_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mom_mtld_slope, mom_mattr_slope,
mom_vocd_slope, mom_ttr_slope, mom_mlu_slope)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")
# plot all parameters of children
ldp_intercept%>%
filter(complete.cases(.))%>%
select(cdi_intercept, ppvt_intercept, mtld_intercept, mattr_intercept,
vocd_intercept, ttr_intercept, mlu_intercept, sen_intercept,
cdi_slope, ppvt_slope, mtld_slope, mattr_slope, vocd_slope,
ttr_slope, mlu_slope, sen_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")
childes_intercept%>%
filter(complete.cases(.))%>%
select(mtld_intercept, mattr_intercept,vocd_intercept, ttr_intercept,
mtld_slope, mattr_slope, vocd_slope, ttr_slope,
mlu_intercept, mlu_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")
# plot all parametes of mothers
ldp_intercept%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept, mom_vocd_intercept,
mom_ttr_intercept, mom_mlu_intercept, mom_mtld_slope,
mom_mattr_slope, mom_vocd_slope, mom_ttr_slope, mom_mlu_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")
childes_intercept%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept,
mom_mtld_slope, mom_mattr_slope,
mom_vocd_slope, mom_ttr_slope,
mom_mlu_intercept, mom_mlu_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")
# plot parameters of child and mother
ldp_intercept%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept,
mom_mlu_intercept, mom_mtld_slope, mom_mattr_slope,
mom_vocd_slope, mom_ttr_slope, mom_mlu_slope,
mtld_intercept, mtld_slope,
mattr_intercept, mattr_slope,
vocd_intercept, vocd_slope,
ttr_intercept, ttr_slope,
mlu_intercept, mlu_slope,
sen_intercept, sen_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")
childes_intercept%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept,
mom_vocd_intercept, mom_ttr_intercept,
mom_mtld_slope, mom_mattr_slope,
mom_vocd_slope, mom_ttr_slope,
mom_mlu_intercept, mom_mlu_slope,
mtld_intercept, mtld_slope,
mattr_intercept,mattr_slope,
vocd_intercept, vocd_slope,
ttr_intercept, ttr_slope,
mlu_intercept, mlu_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")